Libraries

options(timeout=180)
library(ggspavis)
library(openxlsx)
library(dplyr)
library(scran)
library(scater)
library(TENxVisiumData)
library(SpatialExperiment)
library(SpatialFeatureExperiment)
library(Voyager)
library(patchwork)
library(nnSVG)
library(GSVA)
library(BiocParallel)

Load Data

spe <- MouseBrainSagittalAnterior()
spe <- spe[, colData(spe)$sample_id == "MouseBrainSagittalAnterior1"]

Preprocessing

QC

Subset to keep only spots over tissue

spe <- spe[, int_colData(spe)$spatialData$in_tissue == TRUE]

Identify mitochondrial genes

is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$symbol)
spe <- addPerCellQC(spe, subsets = list(mito = is_mito))

Select QC thresholds sum: Library size (represents the total sum of UMI counts per spot). detected: The number of expressed features (refers to the number of genes with non-zero UMI counts per spot). subsets_mito_percent: A high proportion of mitochondrial reads indicates cell damage.

To select the QC thresholds we first plot them and choose the thresholds visually.

hist(colData(spe)$sum, breaks = 20, col = "skyblue", border = "white", 
     xlab = "UMI Counts per Spot", ylab = "Frequency", 
     main = "Distribution of UMI Counts per Spot")

grid(col = "gray", lwd = 0.5)

hist(colData(spe)$detected, breaks = 20, col = "skyblue", border = "white", 
     xlab = "Number of genes with non-zero UMI counts per spot", ylab = "Frequency", 
     main = "Distribution of number of genes with non-zero UMI counts per spot")

grid(col = "gray", lwd = 0.5)

hist(colData(spe)$subsets_mito_percent, breaks = 20, col = "skyblue", border = "white", 
     xlab = "Percentage of mitochondrial reads", ylab = "Frequency", 
     main = "Distribution of percentage of mitochondrial reads")

grid(col = "gray", lwd = 0.5)

qc_lib_size <- colData(spe)$sum < 2000
qc_detected <- colData(spe)$detected < 1000
qc_mito <- colData(spe)$subsets_mito_percent > 30

Number of discarded spots for each metric

apply(cbind(qc_lib_size, qc_detected, qc_mito), 2, sum)
## qc_lib_size qc_detected     qc_mito 
##          26          23          14
discard <- qc_lib_size | qc_detected | qc_mito
table(discard)
## discard
## FALSE  TRUE 
##  2655    40
colData(spe)$discard <- discard

Plot discarded spots

# check spatial pattern of combined set of discarded spots
plotVisium(spe, x_coord = "pxl_row_in_fullres", y_coord = "pxl_col_in_fullres",
           fill = "discard")

Filter low-quality spots

spe <- spe[, !colData(spe)$discard]

Normalization

Calculate log-transformed normalized counts (abbreviated as “logcounts”) using library size factors.

spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)

Feature selection

spe <- spe[!is_mito, ]

Now, we keep only highly variable genes.

# fit mean-variance relationship
dec <- modelGeneVar(spe, assay.type = "logcounts")
# select top HVGs
top_hvgs <- getTopHVGs(dec, prop = 0.9)
spe <- spe[rownames(spe) %in% top_hvgs,]
fit <- metadata(dec)
plot(fit$mean, fit$var, 
     xlab = "mean of log-expression", ylab = "variance of log-expression")
curve(fit$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)

Anatomical regions

Clustering

PCA

Compute PCA

set.seed(123)
spe <- runPCA(spe, subset_row = top_hvgs)

reducedDimNames(spe)
## [1] "PCA"

Perform graph-based clustering

set.seed(123)
k <- 28
g <- buildSNNGraph(spe, k = k, use.dimred = "PCA")
g_walk <- igraph::cluster_walktrap(g)
clus <- g_walk$membership
table(clus)
## clus
##   1   2   3   4   5   6   7   8   9 
## 424 210 320 581 476 314 129  78 123
colLabels(spe) <- factor(clus)

Load Gene Sets

Cellmarker data from http://bio-bigdata.hrbmu.edu.cn/CellMarker/CellMarker_download.html

cellmarkers <- read.xlsx('Cell_marker_Mouse.xlsx', rowNames =F)

Extract Gene Set info from the xlsx file

cell_types <- cellmarkers %>% filter(tissue_class=='Brain' & cell_type=="Normal cell") %>% dplyr::count(cell_name) %>% arrange(desc(n)) %>% filter(n>1) %>% select(cell_name)
cell_types <- cell_types$cell_name


# filter genes symbol info on the xlsx file
cm_sets <- lapply(cell_types, function(x) cellmarkers %>% 
                    filter(tissue_class=='Brain' & 
                             cell_type=="Normal cell" &
                             cell_name==x &
                             !(is.na(Symbol)))  %>% pull(Symbol) %>% unique())

cell_types <- unique(make.names(cell_types))
names(cm_sets) <- cell_types
set_names <- names(cm_sets)

Create list with geneset names from SpatialExperiment object

genesetlist <- lapply(set_names, function(x) {
  geneset <- rownames(rowData(spe)[rowData(spe)$symbol %in% cm_sets[[x]], ,drop=FALSE])
})

names(genesetlist) <- set_names
genesetlist <- genesetlist[sapply(genesetlist, function(x) length(x) > 1)]
# Function  for comparison between GSVA scores and clusters
source("gsva_cluster_tests.R")

Read files

SpatialExperiment object with GSVA scores.

gsvaPar <- gsvaParam(spe, genesetlist, assay = "logcounts", kcdf="none", minSize = 2)
system.time(res <- gsva(gsvaPar,  BPPARAM=MulticoreParam(workers=4)))
## Warning in .filterGenes(dataMatrix, removeConstant = removeConstant,
## removeNzConstant = removeNzConstant): 144 genes with constant non-zero values
## throughout the sample.
## Warning in .filterGenes(dataMatrix, removeConstant = removeConstant,
## removeNzConstant = removeNzConstant): Genes with constant non-zero values are
## discarded.
## Setting parallel calculations through a MulticoreParam back-end with workers=4 and tasks=100.
## Estimating GSVA scores for 144 gene sets.
## Estimating ECDFs directly
## Estimating ECDFs in parallel on 4 cores
##    user  system elapsed 
##  52.940  35.540  41.379

Get spatially variable genesets

GSVA SpatialFeatureExperiment object

We coerce the SpatialExperiment object to SpatialFeatureExperiment object in order to use the package Voyager functionalities.

gsva_sfe <- toSpatialFeatureExperiment(res)
colGraph(gsva_sfe, "visium", sample_id = "MouseBrainSagittalAnterior1") <- findVisiumGraph(gsva_sfe, sample_id = "MouseBrainSagittalAnterior1", zero.policy = TRUE)

Compute Moran’s I for GSVA scores

assays(gsva_sfe)$logcounts <- assays(gsva_sfe)$es
moran_res <- runUnivariate(gsva_sfe, type = "moran", colGraphName = "visium")
rowData(moran_res)[order(rowData(moran_res)$moran_MouseBrainSagittalAnterior1,decreasing = TRUE),][1:10,2:3]
## DataFrame with 10 rows and 2 columns
##                                moran_MouseBrainSagittalAnterior1
##                                                        <numeric>
## Myelinating.stem.cell                                   0.843874
## Striatopallidal.loop.cell                               0.815366
## Vascular.leptomeningeal.cell                            0.814274
## Synaptic.cell                                           0.784577
## Myelinating.oligodendrocyte                             0.771354
## Early.GABAergic.neuron                                  0.760723
## D1.Medium.spiny.neuron.D1.MSN.                          0.756597
## Astrocyte                                               0.749458
## Satellite.glial.cell                                    0.748608
## Oligodendrocyte                                         0.721197
##                                K_MouseBrainSagittalAnterior1
##                                                    <numeric>
## Myelinating.stem.cell                                1.56658
## Striatopallidal.loop.cell                            4.16227
## Vascular.leptomeningeal.cell                         4.82437
## Synaptic.cell                                        5.09456
## Myelinating.oligodendrocyte                          4.28530
## Early.GABAergic.neuron                               3.38674
## D1.Medium.spiny.neuron.D1.MSN.                       6.13580
## Astrocyte                                            4.34787
## Satellite.glial.cell                                 4.28117
## Oligodendrocyte                                     10.92995

Plot Top 10 Spatially Autocorrelated Genesets

top_SVG_genesets <- rownames(rowData(moran_res)[order(rowData(moran_res)$moran_MouseBrainSagittalAnterior1,decreasing = TRUE),][1:10,])
plot_list_hist_gsva <- lapply(top_SVG_genesets, function(x){
  pl <- plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = x, assay = "es",sample_ids=NULL) + ggtitle(paste0(x," GSVA scores superposed in histological image" ))+ theme(text = element_text(size = 7))
  pl
})

Clusters plot

library(RColorBrewer)
cluster_plot <- plotVisium(spe,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = "label", palette = brewer.pal(9, "Set1"))+labs(subtitle=NULL)+ggtitle("Cluster regions")+ theme(text = element_text(size = 7))
for (i in 1:length(plot_list_hist_gsva)){
    print(plot_list_hist_gsva[[i]])
}

Compute NNSVG for GSVA scores

spe_nnSVG <- nnSVG(res, assay_name = "es", BPPARAM=MulticoreParam(workers=4))
rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = TRUE),][1:10,2:15]
## DataFrame with 10 rows and 14 columns
##                                       sigma.sq     tau.sq         phi    loglik
##                                      <numeric>  <numeric>   <numeric> <numeric>
## Disease.associated.microglial.cell 0.005727859 0.03935095 7.36083e+01   352.401
## M1.macrophage                      0.000395995 0.00352343 4.93239e+01  3597.986
## Monocyte.derived.macrophage        0.000187491 0.00343928 7.05622e+00  3708.621
## Early.intermediate.precursor.cell  0.000789102 0.01466384 4.68450e+00  1786.319
## Vascular.endothelial.cell          0.001728764 0.01816830 1.15604e+01  1464.384
## Border.associated.macrophage       0.000530252 0.00316531 3.07233e+01  3700.688
## Late.GABAergic.neuron              0.001024825 0.01441191 6.59090e-05  1806.802
## Hemogenic.endothelial.cell         0.000993726 0.01065153 2.08556e+00  2193.264
## Intermediate.GABAergic.neuron      0.001101066 0.01134260 6.18977e+00  2098.128
## Plasmacytoid.dendritic.cell.pDC.   0.000547286 0.00413581 9.49803e+00  3410.168
##                                      runtime      mean       var     spcov
##                                    <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell     3.008        NA        NA        NA
## M1.macrophage                          2.529        NA        NA        NA
## Monocyte.derived.macrophage            1.523        NA        NA        NA
## Early.intermediate.precursor.cell      1.640        NA        NA        NA
## Vascular.endothelial.cell              2.303        NA        NA        NA
## Border.associated.macrophage           2.050        NA        NA        NA
## Late.GABAergic.neuron                  1.288        NA        NA        NA
## Hemogenic.endothelial.cell             2.674        NA        NA        NA
## Intermediate.GABAergic.neuron          1.920        NA        NA        NA
## Plasmacytoid.dendritic.cell.pDC.       0.570        NA        NA        NA
##                                      prop_sv loglik_lm   LR_stat      rank
##                                    <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.1270632   347.042   10.7175       144
## M1.macrophage                      0.1010341  3589.585   16.8027       143
## Monocyte.derived.macrophage        0.0516962  3692.630   31.9816       142
## Early.intermediate.precursor.cell  0.0510649  1768.274   36.0911       141
## Vascular.endothelial.cell          0.0868854  1436.326   56.1164       140
## Border.associated.macrophage       0.1434835  3672.069   57.2372       139
## Late.GABAergic.neuron              0.0663887  1775.391   62.8235       138
## Hemogenic.endothelial.cell         0.0853331  2153.137   80.2558       137
## Intermediate.GABAergic.neuron      0.0884840  2055.406   85.4442       136
## Plasmacytoid.dendritic.cell.pDC.   0.1168643  3353.784  112.7672       135
##                                           pval        padj
##                                      <numeric>   <numeric>
## Disease.associated.microglial.cell 4.70688e-03 4.70688e-03
## M1.macrophage                      2.24568e-04 2.26138e-04
## Monocyte.derived.macrophage        1.13575e-07 1.15175e-07
## Early.intermediate.precursor.cell  1.45517e-08 1.48613e-08
## Vascular.endothelial.cell          6.52367e-13 6.71006e-13
## Border.associated.macrophage       3.72480e-13 3.85878e-13
## Late.GABAergic.neuron              2.27596e-14 2.37491e-14
## Hemogenic.endothelial.cell         0.00000e+00 0.00000e+00
## Intermediate.GABAergic.neuron      0.00000e+00 0.00000e+00
## Plasmacytoid.dendritic.cell.pDC.   0.00000e+00 0.00000e+00

Plot Top 10 Spatially Variable Genesets

top_nnSVG_genesets <- rownames(rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = FALSE),][1:10,])

plot_list_hist_gsva_nnsvg <- lapply(top_nnSVG_genesets, function(x){
  pl <- list(x,plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = x, assay = "es",sample_ids=NULL) +  ggtitle(paste0(x," GSVA scores superposed in histological image" ))+ theme(text = element_text(size = 7)))
  pl
})
for (i in 1:length(plot_list_hist_gsva_nnsvg)){
  print(plot_list_hist_gsva_nnsvg[i][[1]][[2]])
  #print(gsva_clusters_ttests(spe,res,plot_list_hist_gsva_nnsvg[i][[1]][[1]]))
}

Correlation between clusters and genesets

most_corr<-most_cor_geneset(spe,res)
corr_plots <- lapply(1:length(unique(colData(spe)$label)), function(x){
  pl <- plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = most_corr[x], assay = "es",sample_ids=NULL) + labs(subtitle=NULL)+ggtitle(paste0(most_corr[x]," GSVA scores (most correlated with cluster ",x,")" ))+ theme(text = element_text(size = 7))
  })

for (i in 1:length(corr_plots)){
  print(corr_plots[[i]]+cluster_plot)
}